---
title: "risk factor"
output: html_document
date: "2023-08-12"
---
#1.下载并加载R包
```{r}
#install.packages("tidyverse")
#install.packages("ggplot2")
library(tidyverse)
library(ggplot2)
```
#2.数据加载
```{R}
Risk_factor_2019 <- read.csv("./data/Risk_factor_2019.csv",header = T)
order_globalandregions <- read.csv("./data/order_globalandregions.csv",header = F)
```
#3.数据清洗(0.了解数据;1.行与列的筛选;2.数据单位转化;3.与可视化相关的变量内顺序调整)
```{r}
##确定数据清洗的目标:通过对比发现需要进行3个清洗步骤,分别是列行筛选(非常7+1、数值格式、行排序
##3.0初步了解本数据的risk factor信息
unique(Risk_factor_2019$rei_name)
##3.1基于select和filter函数分别筛选列与行 (一共10列,22*8=176行)
Risk_factor_2019_DALYs_Male <- Risk_factor_2019 %>%
select("location_name","year","sex_name","age_name","measure_name","metric_name","rei_name","val","upper", "lower" ) %>% #管道符
filter(location_name %in% order_globalandregions$V1,#匹配符
year == "2019",
sex_name == "Male",
age_name == "All ages",
measure_name == "DALYs (Disability-Adjusted Life Years)",
metric_name == "Percent",
rei_name %in% c("Ambient particulate matter pollution", "Household air pollution from solid fuels","Ambient ozone pollution","Smoking" ,"Secondhand smoke" , "Occupational particulate matter, gases, and fumes","High temperature" ,"Low temperature"))
##3.2数据单位转化(单位、小数点):使用mutate函数
Risk_factor_2019_DALYs_Male <- Risk_factor_2019_DALYs_Male %>%
mutate(val = round(val*100,1),
upper_1 = round(upper*100,1),
lower = round(lower*100,1),
rei_name = str_wrap(Risk_factor_2019_DALYs_Male$rei_name, width = 30))
##3.3与可视化相关的变量内顺序调整(Location_name, rei_name)
Risk_factor_2019_DALYs_Male <- Risk_factor_2019_DALYs_Male %>%
mutate(location_name = fct_relevel(location_name,rev(c('Global','High-income Asia Pacific','High-income North America', 'Western Europe','Australasia', 'Andean Latin America','Tropical Latin America','Central Latin America','Southern Latin America', 'Caribbean', 'Central Europe', 'Eastern Europe','Central Asia', 'North Africa and Middle East', 'South Asia', 'Southeast Asia', 'East Asia','Oceania', 'Western Sub-Saharan Africa', 'Eastern Sub-Saharan Africa', 'Central Sub-Saharan Africa', 'Southern Sub-Saharan Africa'))),
rei_name = fct_relevel(rei_name, c('Smoking',
'Ambient particulate matter\npollution',
'Occupational particulate\nmatter, gases, and fumes',
'Household air pollution from\nsolid fuels',
'Secondhand smoke',
'Ambient ozone pollution',
'Low temperature',
'High temperature')))
#以下为Table1中的老方法,不适用于目前的情景
###对行进行排序
####第一步是新建一个因子(将向量转变为因子),用于第二步
#Order_location_factor <- factor(Risk_factor_2019_DALYs_Male$"location_name", levels = order_globalandregions$"V1")
####第二步是应用order函数,对数据框的行按照既定的因子进行排序
#Risk_factor_2019_DALYs_Male <- Risk_factor_2019_DALYs_Male[order(Order_location_factor), ]
###对行进行排序
####第一步是新建一个因子(将向量转变为因子),用于第二步
#Order_RF_factor <- factor(Risk_factor_2019_DALYs_Male$"rei_name", levels = c('Smoking','Ambient particulate matter pollution','Occupational particulate matter, gases, and fumes','Household air pollution from solid fuels', 'Secondhand smoke','Ambient ozone pollution','Low temperature', 'High temperature'))
####第二步是应用order函数,对数据框的行按照既定的因子进行排序
#Risk_factor_2019_DALYs_Male <- Risk_factor_2019_DALYs_Male[order(Order_RF_factor), ]
```
#4.可视化(Male)
```{R}
##4.1 针对目标图形的准备工作(手动设置颜色)
color_risk <- c('Smoking' = '#2B24D6',
'Ambient particulate matter\npollution' = '#AD1B2A',
'Occupational particulate\nmatter, gases, and fumes' = '#006660',
'Household air pollution from\nsolid fuels' = '#523104',
'Secondhand smoke' = '#7E8E2B',
'Ambient ozone pollution' = '#2396FD',
'Low temperature' = '#B16184',
'High temperature' = '#FBD9CD')
##4.2 可视化(柱状图、文本、坐标轴互换、小面板分割、坐标轴设置、主题设置[背景、网格线、坐标轴text和title、分面的背景和文本、图例])
p1 <- ggplot()+
geom_col(data = Risk_factor_2019_DALYs_Male,aes(x = location_name,y = val, fill = rei_name),color = 'black',width = .7,position = 'dodge',size = .3)+
#geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, position = position_dodge(0.7))+ 用于加误差线
scale_fill_manual(values = color_risk)+ #手动添加颜色
scale_y_continuous(breaks = c(0,20,40,60,80),limits = c(0, 90),expand = c(0,0))+ #手动调整J纵坐标轴
coord_flip() + #将 X 轴和 Y 轴交换位置,从而实现横向显示的效果(coord的全称是coordinate)
geom_text(data = Risk_factor_2019_DALYs_Male,
aes(x= location_name, label=val, y=val+1),
position=position_dodge(width=0.7), vjust=0.5,hjust = -0.1, size = 2) + #添加文本图层
facet_wrap(~rei_name, scales = "free_x",nrow = 1)+ #将数据分割成小块或小面板
ylab('DALYs attributable to risk factors (%)')+
xlab("GBD regions")+
theme_light()+ #多种预设的主题,如theme_classic, theme_minimal(简洁样式), theme_bw(黑白样式),theme_light(浅色)
theme(panel.background = element_rect(fill = "transparent"),#elemet_rect()函数是用于对矩形元素的相关参数设置(如背景background)
panel.grid.major = element_blank(), #grid是指网络,panel.grid.major是一个参数,用于设置主要网络线的颜色、类型、粗细,而element_blank()是指隐藏
panel.grid.minor = element_blank(),
axis.text = element_text(size = 6, colour = 'black'),
axis.title.x = element_text(size = 8, colour = 'black'),
axis.title.y = element_text(size = 8, colour = 'black'),
strip.background = element_rect(fill = '#B6D1FA'), #strip.background和strip.text都是针对分面facet的设置(背景和文本)
strip.text = element_text(colour = 'black',size = 6,lineheight = 1),
legend.position = 'none')
```
#5.文件导出
```{R}
ggsave(p1,file = "./output/risk_factor_male.pdf", units = 'cm', height = 15, width = 25 )
```